# DBSCAN micro-seismic denoising and Monte Carlo fracture generation (and the manual process of elbow analysis)
import copy
import math
import random
import numpy as np
import matplotlib.pyplot as plt


def loadDataSet(fileName, splitChar):
    """
    load data from a file
    :param fileName: contain data
    :param splitChar:
    :return: data list
    """
    dataSet = []
    with open(fileName) as fr:
        for line in fr.readlines():
            curline = line.strip().split(splitChar)
            fltline = list(map(float, curline))
            dataSet.append(fltline)
    return dataSet


def writeDataSet(fileName, list, splitChar):
    """
    write a list to a file
    :param fileName: name of the file in which the data to be written
    :param list: the data list to be written
    :param splitChar: symbol for splitting data
    :return:
    """
    file = open(fileName, 'a')
    for item in list:
        file.write(str(item))
        if list.index(item) == len(list) - 1:
            file.write('\n')
        else:
            file.write(splitChar)
    file.close()
    return


def approximate465(anyj, t):
    """
    numerical approximate method
    :param anyj: the number would be approximated
    :param t: magnitude
    :return: numerical approximate result
    """
    if anyj * 10 ** t - int(anyj * 10 ** t) > 0.5:
        anyja = (int(anyj * 10 ** t) + 1) / 10 ** t
    elif anyj - int(anyj) < 0.5:
        anyja = (int(anyj * 10 ** t)) / 10 ** t
    else:
        if (int(anyj * 10 ** t) + 1) % 2 == 0:
            anyja = (int(anyj * 10 ** t) + 1) / 10 ** t
        else:
            anyja = (int(anyj * 10 ** t)) / 10 ** t
    return anyja


def proj(center, occurrence):
    """
    function: convert fracture parameters to analytical parameters
    :param center: the positioning point of a fracture
    :param occurrence: format (using degrees): [dip, dip angle]
    :return: analytical parameters for defining a fracture
    """
    if occurrence[1] == 0:
        a = b = 0
    else:
        ab = math.tan(math.radians(occurrence[1]))
        if ab == 0:
            a = b = 0
        elif occurrence[0] == 0:
            a = 0
            b = ab
        elif occurrence[0] == 180:
            a = 0
            b = - ab
        else:
            if 0 < occurrence[0] < 180:
                a = np.sqrt(ab / (1 + (math.tan(math.radians(90 - occurrence[0]))) ** 2))
            else:
                a = - np.sqrt(ab / (1 + (math.tan(math.radians(90 - occurrence[0]))) ** 2))
            b = a * math.tan(math.radians(90 - occurrence[0]))
    d = - a * center[0] - b * center[1] - center[2]
    return a, b, d


def distances(point, a, b, d):
    """
    :param point: point with a 3D coordinate
    :param a:
    :param b:
    :param d:
    :return:distance between a point and a fracture
    """
    distance = abs(a * point[0] + b * point[1] + point[2] + d) / np.sqrt(a ** 2 + b ** 2 + 1)
    return distance


def subransac(Data2, numfractures, sigma, resects=0):
    """
    Monte Carlo fracture network generation from a certain micro-seismic event cloud
    :param Data2: effective micro-seismic events list from DBSCAN
    :param numfractures: number of fractures defined
    :param sigma: distance threshold to determine whether a point belongs to a fracture or not
    :param resects: abandoned parameter (may be used for further research, default setting is 0)
    :return: fracture parameters: including positioning point, dip and dip angle
    """
    lines = 0  # number of fractures at present
    ini_data2 = len(Data2)
    ini_Data2 = []
    for aa in Data2:
        if aa not in ini_Data2:
            ini_Data2.append(aa)
    center_a = []
    num_reach = False
    while len(Data2) >= approximate465(ini_data2 * 0.05, 0):
        chose, bestfit = [], []
        total_distance = 10 ** 15
        if numfractures == 1:
            ut = 1
        else:
            ut = 20
        orientations = loadDataSet('prior_orientation.txt', ',')
        dips, angles = [], []
        for ucuori in orientations:
            if len(ucuori) == 4:
                dips.append(ucuori[:2])
                angles.append(ucuori[2:])
            else:
                print('ValueError')
                exit(1)
        for i in range(ut):
            rcenter = Data2[random.sample(range(len(Data2)), 1)[0]][:3]
            angle = random.randint(int(angles[(lines + resects) % len(dips)][0]),
                                   int(angles[(lines + resects) % len(dips)][1]))
            dip = random.randint(int(dips[(lines + resects) % len(dips)][0]),
                                 int(dips[(lines + resects) % len(dips)][1]))
            if dip >= 360:
                dip -= 360
            rcenter.append(dip)
            rcenter.append(angle)
            a, b, d = proj(rcenter[:3], rcenter[3:])
            fitted = []
            ats_dist2 = 0
            for t in range(len(ini_Data2)):
                distance = distances(ini_Data2[t], a, b, d)
                ats_dist2 += distance  # 点到裂缝的距离
                if distance <= sigma and ini_Data2[t] in Data2:
                    fitted.append(ini_Data2[t])
            if ats_dist2 < total_distance:
                total_distance = ats_dist2
                center = rcenter  # best fit fracture parameter
                bestfit = fitted  # points fitted by the best fit fracture network
        # remove best fit fracture
        for tt in bestfit:
            if tt in Data2:
                Data2.remove(tt)
        center_a.append(center)
        lines += 1
        if lines == numfractures:  # two ending conditions: number of fractures and number of remaining micro-seismic
            # events, whichever comes first
            num_reach = True
            break
    return center_a, num_reach, lines


def dbscan(Data, Eps, MinPts):
    """
    DBSCAN method for generating effective micro-seismic points
    :param Data: original micro-seismic data list
    :param Eps: search radius
    :param MinPts: the minimum number of points to form a valid cluster
    :return: effective micro-seismic data list
    """
    num = len(Data)
    unvisited = [i for i in range(num)]  # unvisited points
    visited = []  # visited points
    C = [-1 for _ in range(num)]
    k = -1
    cluster = []  # record effective points
    cluspoints = 0
    while len(unvisited) > 0:
        p = random.choice(unvisited)
        unvisited.remove(p)
        visited.append(p)
        N = []
        for i in range(num):
            if dist(Data[i], Data[p]) <= Eps:
                N.append(i)
        if len(N) >= MinPts:
            k += 1
            C[p] = k
            for pi in N:
                if pi in unvisited:
                    unvisited.remove(pi)
                    visited.append(pi)
                    M = []
                    for j in range(num):
                        if dist(Data[j], Data[pi]) <= Eps:
                            M.append(j)
                    if len(M) >= MinPts:
                        for t in M:
                            if t not in N:
                                N.append(t)
                if C[pi] == -1:
                    C[pi] = k
            gt = 0
            for jc in N:
                if Data[jc] not in cluster:
                    cluster.append(Data[jc])
                    gt += 1
            cluspoints += gt
        else:
            C[p] = -1
    if k == -1:
        print('no valid cluster')
    else:
        print(f'dbscan cluster completed, with a total of {cluspoints} valid points ({cluspoints / num * 100}%)')
    return cluster


def dist(t1, t2):
    """
    distance of two points
    :param t1:
    :param t2:
    :return:
    """
    dis = math.sqrt((np.power((t1[0] - t2[0]), 2) + np.power((t1[1] - t2[1]), 2) + np.power((t1[2] - t2[2]), 2)))
    return dis


def singfracgen(opts, clc, sigma, iters):
    """
    multiply defined number of fractures Monte Carlo generation
    :param opts:
    :param clc: list of effective micro-seismic events
    :param sigma: distance threshold
    :param iters: number of attempts for the fracture generation under each of the number of fractures defined
    :return: best fit fracture network parameter or original data for elbow analysis
    """
    goal_function, fitted_points_3, distance_2, numfracture = [], [], [], []
    cycle = 0
    for num in range(opts[0], opts[1] + 1, opts[2]):
        fits = []
        numfracture.append(num)  # list of numbers of fractures
        pre_total = 0
        goal = 10 ** 15
        dstss, npots, scenter_a = [], [], []
        cycle += 1
        for j in range(iters):
            if j == 0 and cycle == 1:
                data = clc
            else:
                data = restore
            restore = copy.deepcopy(data)
            if j == 0:
                print(f'{len(data)} points received for fracture generation')
                print(f'attempt to fit {len(data)} microseismic events with {num} fractures')
            if (j + 1) % 20 == 0:
                print(f'{j + 1}th attempt for fitting using {num} fractures is being achieved')
            center_a, num_r, acnum = subransac(data, num, sigma)
            dista = 0
            judge, dtt = [0 for jt in range(len(restore))], [10 ** 15 for ch in range(len(restore))]
            for t in range(acnum):
                ca1, cb1, cd1 = proj(center_a[t][:3], center_a[t][3:])
                for f in range(len(restore)):
                    dst_s = distances(restore[f], ca1, cb1, cd1)
                    dista += dst_s
                    if dst_s <= sigma and dst_s < dtt[f]:
                        judge[f] = t + 1
                        dtt[f] = dst_s
            cufitted = []
            for t in range(acnum):
                for f in range(len(restore)):
                    if t == judge[f] - 1 and restore[f] not in cufitted:
                        cufitted.append(restore[f])
            dstss.append(dista / acnum / len(judge))
            npots.append(len(cufitted))
            fits.append(center_a)
            if dista / acnum / len(judge) < goal and (len(cufitted) > pre_total or len(cufitted) == len(judge)):
                goal = dista / acnum / len(judge)
                pre_total = len(cufitted)
                snum_r = num_r
                snum = acnum
        fitted_points_3.append(max(npots) / (len(judge)) * 100)
        goal_function.append(-np.median(dstss))
        befit = fits[npots.index(max(npots))]
        if opts[0] == opts[1]:
            while not len(befit) == num:
                npots.remove(max(npots))
                fits.remove(befit)
                befit = fits[npots.index(max(npots))]
            if snum_r:
                print(f'the best fit for the dataset with {num} fractures:')
            else:
                print(f'the best fit reached in advance with {snum} out of {num} fractures:')
            print('new_bfit=', befit)
        print(f'bfit rate={max(npots) / len(judge) * 100}%')
        print(f'bfit distance={dstss[npots.index(max(npots))]}')
    if not opts[0] == opts[1]:
        # if the maximum number of fractures equals to the minimum, output best fit fractures' parameters
        print('best=', fitted_points_3)
    cefit = copy.deepcopy(befit)
    return fitted_points_3, cefit, numfracture


# loading parameters and original micro-seismic events
opt = loadDataSet('option.txt', ',')
clc = []
with open('list.txt') as fr:
    for line in fr.readlines():
        curline = line.strip().split(',')
        clc.append(curline[0])
sigma, avery, points, iters = opt[1][0], opt[1][1], opt[1][2], int(opt[1][3])
opts = []
for cda in opt[0]:
    opts.append(int(cda))
raw = loadDataSet(f'{clc[0]}.txt', ',')
# eliminating ineffective micro-seismic events
rawdata = []
for cadda in raw:
    if cadda not in rawdata:
        rawdata.append(cadda)
print(f'{len(raw) - len(rawdata)} repeat points have eliminated and {len(rawdata)} points received for denoising')
data = dbscan(rawdata, avery, points)
be_fits, elbow_data = [], []
if opts[0] == opts[1]:  # if the maximum number of fractures equals to the minimum
    # generate best fit fractures
    num_frac_app = opts[0]
else:  # generation of original data for elbow analysis
    numfrac, cycles = [c_c for c_c in range(opts[0], opts[1] + 1, opts[2])], 10
    writeDataSet('fitted_rate.txt', numfrac, ',')
    writeDataSet('elbow_data.txt', numfrac, ',')
    for i_i in range(cycles):  # repeat 10 times
        print(f'{i_i + 1} out of {cycles}')
        restore = copy.deepcopy(data)
        b_fit, cef, numfrac = singfracgen(opts, data, sigma, iters)
        for dd in b_fit:
            be_fits.append(dd)
        writeDataSet('fitted_rate.txt', b_fit, ',')
        data = restore
        if i_i == cycles - 1:
            file2 = open('fitted_rate.txt', 'a')
            file2.write('\n')
            file2.close()
    for i_i_i in range(int((opts[1] - opts[0]) / opts[2]) + 1):  # elbow data generation
        mid_d = []
        for ctt in range(len(be_fits)):
            if ctt % ((opts[1] - opts[0]) / opts[2] + 1) == i_i_i:
                mid_d.append(be_fits[ctt])
        elbow_data.append(np.median(mid_d))  # data for elbow analysis
    writeDataSet('elbow_data.txt', elbow_data, ',')
    fig = plt.figure(figsize=(8, 6), dpi=80)
    ax = fig.add_subplot(111)
    plt.plot(numfrac, elbow_data)
    plt.show()
    num_frac_app = eval(input('please input an appropriate number of fractures:'))  # input the number of fractures
optc = [num_frac_app, num_frac_app, 1]
b_fit_r, b_fit_frac, aic = singfracgen(optc, data, sigma, iters)  # calculate best fracture generation
for actf in b_fit_frac:  # write best generation to file
    writeDataSet('best_fit.txt', actf, ',')
    if b_fit_frac.index(actf) == len(b_fit_frac) - 1:
        file = open('best_fit.txt', 'a')
        file.write('\n')
        file.close()
